package org.yonggan.shop.sql.report

import org.apache.spark.sql.DataFrame
import org.yonggan.shop.constant.ConfigurationManager
import org.yonggan.shop.utils.SparkUtils

/**
  * 统计用户按照地域分布
  */
object RptArea {

  def main(args: Array[String]): Unit = {

    val spark = SparkUtils.getSparkSession("统计用户按照地域分布")

    // 文件输出
    val jsonDF: DataFrame = spark.read.json(ConfigurationManager.TASK_INJSON)


    import org.apache.spark.sql.functions._

    val rptDf = jsonDF.filter(
      """
      | openid != ""
      """.stripMargin)
      .select("openid", "province", "city").distinct
      .groupBy("province", "city")
      .agg(count("*") as ("ctn"))
      .select("province", "city", "ctn")

      rptDf.show()

  }

}
